import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import matplotlib.ticker as mtick
plt.style.use('fivethirtyeight')
import warnings
warnings.filterwarnings('ignore')
%matplotlib inline
import os
os.chdir ('C:\\Users\\win 10\\Documents\\End To End Project\\Data Science Project\\ZOMATO RATING\\')
os.getcwd()
'C:\\Users\\win 10\\Documents\\End To End Project\\Data Science Project\\ZOMATO RATING'
data = pd.read_csv('zomato.csv')
display(data)
| url | address | name | online_order | book_table | rate | votes | phone | location | rest_type | dish_liked | cuisines | approx_cost(for two people) | reviews_list | menu_item | listed_in(type) | listed_in(city) | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | https://www.zomato.com/bangalore/jalsa-banasha... | 942, 21st Main Road, 2nd Stage, Banashankari, ... | Jalsa | Yes | Yes | 4.1/5 | 775 | 080 42297555\r\n+91 9743772233 | Banashankari | Casual Dining | Pasta, Lunch Buffet, Masala Papad, Paneer Laja... | North Indian, Mughlai, Chinese | 800 | [('Rated 4.0', 'RATED\n A beautiful place to ... | [] | Buffet | Banashankari |
| 1 | https://www.zomato.com/bangalore/spice-elephan... | 2nd Floor, 80 Feet Road, Near Big Bazaar, 6th ... | Spice Elephant | Yes | No | 4.1/5 | 787 | 080 41714161 | Banashankari | Casual Dining | Momos, Lunch Buffet, Chocolate Nirvana, Thai G... | Chinese, North Indian, Thai | 800 | [('Rated 4.0', 'RATED\n Had been here for din... | [] | Buffet | Banashankari |
| 2 | https://www.zomato.com/SanchurroBangalore?cont... | 1112, Next to KIMS Medical College, 17th Cross... | San Churro Cafe | Yes | No | 3.8/5 | 918 | +91 9663487993 | Banashankari | Cafe, Casual Dining | Churros, Cannelloni, Minestrone Soup, Hot Choc... | Cafe, Mexican, Italian | 800 | [('Rated 3.0', "RATED\n Ambience is not that ... | [] | Buffet | Banashankari |
| 3 | https://www.zomato.com/bangalore/addhuri-udupi... | 1st Floor, Annakuteera, 3rd Stage, Banashankar... | Addhuri Udupi Bhojana | No | No | 3.7/5 | 88 | +91 9620009302 | Banashankari | Quick Bites | Masala Dosa | South Indian, North Indian | 300 | [('Rated 4.0', "RATED\n Great food and proper... | [] | Buffet | Banashankari |
| 4 | https://www.zomato.com/bangalore/grand-village... | 10, 3rd Floor, Lakshmi Associates, Gandhi Baza... | Grand Village | No | No | 3.8/5 | 166 | +91 8026612447\r\n+91 9901210005 | Basavanagudi | Casual Dining | Panipuri, Gol Gappe | North Indian, Rajasthani | 600 | [('Rated 4.0', 'RATED\n Very good restaurant ... | [] | Buffet | Banashankari |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 51712 | https://www.zomato.com/bangalore/best-brews-fo... | Four Points by Sheraton Bengaluru, 43/3, White... | Best Brews - Four Points by Sheraton Bengaluru... | No | No | 3.6 /5 | 27 | 080 40301477 | Whitefield | Bar | NaN | Continental | 1,500 | [('Rated 5.0', "RATED\n Food and service are ... | [] | Pubs and bars | Whitefield |
| 51713 | https://www.zomato.com/bangalore/vinod-bar-and... | Number 10, Garudachar Palya, Mahadevapura, Whi... | Vinod Bar And Restaurant | No | No | NaN | 0 | +91 8197675843 | Whitefield | Bar | NaN | Finger Food | 600 | [] | [] | Pubs and bars | Whitefield |
| 51714 | https://www.zomato.com/bangalore/plunge-sherat... | Sheraton Grand Bengaluru Whitefield Hotel & Co... | Plunge - Sheraton Grand Bengaluru Whitefield H... | No | No | NaN | 0 | NaN | Whitefield | Bar | NaN | Finger Food | 2,000 | [] | [] | Pubs and bars | Whitefield |
| 51715 | https://www.zomato.com/bangalore/chime-sherato... | Sheraton Grand Bengaluru Whitefield Hotel & Co... | Chime - Sheraton Grand Bengaluru Whitefield Ho... | No | Yes | 4.3 /5 | 236 | 080 49652769 | ITPL Main Road, Whitefield | Bar | Cocktails, Pizza, Buttermilk | Finger Food | 2,500 | [('Rated 4.0', 'RATED\n Nice and friendly pla... | [] | Pubs and bars | Whitefield |
| 51716 | https://www.zomato.com/bangalore/the-nest-the-... | ITPL Main Road, KIADB Export Promotion Industr... | The Nest - The Den Bengaluru | No | No | 3.4 /5 | 13 | +91 8071117272 | ITPL Main Road, Whitefield | Bar, Casual Dining | NaN | Finger Food, North Indian, Continental | 1,500 | [('Rated 5.0', 'RATED\n Great ambience , look... | [] | Pubs and bars | Whitefield |
51717 rows × 17 columns
display(data.shape)
(51717, 17)
#checking the data types
print (data.dtypes )
url object address object name object online_order object book_table object rate object votes int64 phone object location object rest_type object dish_liked object cuisines object approx_cost(for two people) object reviews_list object menu_item object listed_in(type) object listed_in(city) object dtype: object
#Checking null values
print (data.isna().sum())
url 0 address 0 name 0 online_order 0 book_table 0 rate 7775 votes 0 phone 1208 location 21 rest_type 227 dish_liked 28078 cuisines 45 approx_cost(for two people) 346 reviews_list 0 menu_item 0 listed_in(type) 0 listed_in(city) 0 dtype: int64
#Deleting Unnnecessary Columns
df=data.drop(['url','phone','dish_liked' ],axis=1) #Dropping the column like "phone" and "url" and saving the new dataset as "df
display(df.head())
| address | name | online_order | book_table | rate | votes | location | rest_type | cuisines | approx_cost(for two people) | reviews_list | menu_item | listed_in(type) | listed_in(city) | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 942, 21st Main Road, 2nd Stage, Banashankari, ... | Jalsa | Yes | Yes | 4.1/5 | 775 | Banashankari | Casual Dining | North Indian, Mughlai, Chinese | 800 | [('Rated 4.0', 'RATED\n A beautiful place to ... | [] | Buffet | Banashankari |
| 1 | 2nd Floor, 80 Feet Road, Near Big Bazaar, 6th ... | Spice Elephant | Yes | No | 4.1/5 | 787 | Banashankari | Casual Dining | Chinese, North Indian, Thai | 800 | [('Rated 4.0', 'RATED\n Had been here for din... | [] | Buffet | Banashankari |
| 2 | 1112, Next to KIMS Medical College, 17th Cross... | San Churro Cafe | Yes | No | 3.8/5 | 918 | Banashankari | Cafe, Casual Dining | Cafe, Mexican, Italian | 800 | [('Rated 3.0', "RATED\n Ambience is not that ... | [] | Buffet | Banashankari |
| 3 | 1st Floor, Annakuteera, 3rd Stage, Banashankar... | Addhuri Udupi Bhojana | No | No | 3.7/5 | 88 | Banashankari | Quick Bites | South Indian, North Indian | 300 | [('Rated 4.0', "RATED\n Great food and proper... | [] | Buffet | Banashankari |
| 4 | 10, 3rd Floor, Lakshmi Associates, Gandhi Baza... | Grand Village | No | No | 3.8/5 | 166 | Basavanagudi | Casual Dining | North Indian, Rajasthani | 600 | [('Rated 4.0', 'RATED\n Very good restaurant ... | [] | Buffet | Banashankari |
# Check for Duplicate Records
df.duplicated().sum()
43
# Drop Duplicates
df.drop_duplicates(inplace=True)
display(df)
| address | name | online_order | book_table | rate | votes | location | rest_type | cuisines | approx_cost(for two people) | reviews_list | menu_item | listed_in(type) | listed_in(city) | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 942, 21st Main Road, 2nd Stage, Banashankari, ... | Jalsa | Yes | Yes | 4.1/5 | 775 | Banashankari | Casual Dining | North Indian, Mughlai, Chinese | 800 | [('Rated 4.0', 'RATED\n A beautiful place to ... | [] | Buffet | Banashankari |
| 1 | 2nd Floor, 80 Feet Road, Near Big Bazaar, 6th ... | Spice Elephant | Yes | No | 4.1/5 | 787 | Banashankari | Casual Dining | Chinese, North Indian, Thai | 800 | [('Rated 4.0', 'RATED\n Had been here for din... | [] | Buffet | Banashankari |
| 2 | 1112, Next to KIMS Medical College, 17th Cross... | San Churro Cafe | Yes | No | 3.8/5 | 918 | Banashankari | Cafe, Casual Dining | Cafe, Mexican, Italian | 800 | [('Rated 3.0', "RATED\n Ambience is not that ... | [] | Buffet | Banashankari |
| 3 | 1st Floor, Annakuteera, 3rd Stage, Banashankar... | Addhuri Udupi Bhojana | No | No | 3.7/5 | 88 | Banashankari | Quick Bites | South Indian, North Indian | 300 | [('Rated 4.0', "RATED\n Great food and proper... | [] | Buffet | Banashankari |
| 4 | 10, 3rd Floor, Lakshmi Associates, Gandhi Baza... | Grand Village | No | No | 3.8/5 | 166 | Basavanagudi | Casual Dining | North Indian, Rajasthani | 600 | [('Rated 4.0', 'RATED\n Very good restaurant ... | [] | Buffet | Banashankari |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 51712 | Four Points by Sheraton Bengaluru, 43/3, White... | Best Brews - Four Points by Sheraton Bengaluru... | No | No | 3.6 /5 | 27 | Whitefield | Bar | Continental | 1,500 | [('Rated 5.0', "RATED\n Food and service are ... | [] | Pubs and bars | Whitefield |
| 51713 | Number 10, Garudachar Palya, Mahadevapura, Whi... | Vinod Bar And Restaurant | No | No | NaN | 0 | Whitefield | Bar | Finger Food | 600 | [] | [] | Pubs and bars | Whitefield |
| 51714 | Sheraton Grand Bengaluru Whitefield Hotel & Co... | Plunge - Sheraton Grand Bengaluru Whitefield H... | No | No | NaN | 0 | Whitefield | Bar | Finger Food | 2,000 | [] | [] | Pubs and bars | Whitefield |
| 51715 | Sheraton Grand Bengaluru Whitefield Hotel & Co... | Chime - Sheraton Grand Bengaluru Whitefield Ho... | No | Yes | 4.3 /5 | 236 | ITPL Main Road, Whitefield | Bar | Finger Food | 2,500 | [('Rated 4.0', 'RATED\n Nice and friendly pla... | [] | Pubs and bars | Whitefield |
| 51716 | ITPL Main Road, KIADB Export Promotion Industr... | The Nest - The Den Bengaluru | No | No | 3.4 /5 | 13 | ITPL Main Road, Whitefield | Bar, Casual Dining | Finger Food, North Indian, Continental | 1,500 | [('Rated 5.0', 'RATED\n Great ambience , look... | [] | Pubs and bars | Whitefield |
51674 rows × 14 columns
#Remove all the NaN values records from the dataset
df.dropna(how='any',inplace=True)
display (df.isnull().sum())
address 0 name 0 online_order 0 book_table 0 rate 0 votes 0 location 0 rest_type 0 cuisines 0 approx_cost(for two people) 0 reviews_list 0 menu_item 0 listed_in(type) 0 listed_in(city) 0 dtype: int64
# Display Column Names
display(df.columns)
Index(['address', 'name', 'online_order', 'book_table', 'rate', 'votes',
'location', 'rest_type', 'cuisines', 'approx_cost(for two people)',
'reviews_list', 'menu_item', 'listed_in(type)', 'listed_in(city)'],
dtype='object')
# Rename Column Names
df = df.rename(columns={'approx_cost(for two people)':'cost','listed_in(type)':'type',
'listed_in(city)':'city'})
display (df.columns)
Index(['address', 'name', 'online_order', 'book_table', 'rate', 'votes',
'location', 'rest_type', 'cuisines', 'cost', 'reviews_list',
'menu_item', 'type', 'city'],
dtype='object')
display (df)
| address | name | online_order | book_table | rate | votes | location | rest_type | cuisines | cost | reviews_list | menu_item | type | city | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 942, 21st Main Road, 2nd Stage, Banashankari, ... | Jalsa | Yes | Yes | 4.1/5 | 775 | Banashankari | Casual Dining | North Indian, Mughlai, Chinese | 800 | [('Rated 4.0', 'RATED\n A beautiful place to ... | [] | Buffet | Banashankari |
| 1 | 2nd Floor, 80 Feet Road, Near Big Bazaar, 6th ... | Spice Elephant | Yes | No | 4.1/5 | 787 | Banashankari | Casual Dining | Chinese, North Indian, Thai | 800 | [('Rated 4.0', 'RATED\n Had been here for din... | [] | Buffet | Banashankari |
| 2 | 1112, Next to KIMS Medical College, 17th Cross... | San Churro Cafe | Yes | No | 3.8/5 | 918 | Banashankari | Cafe, Casual Dining | Cafe, Mexican, Italian | 800 | [('Rated 3.0', "RATED\n Ambience is not that ... | [] | Buffet | Banashankari |
| 3 | 1st Floor, Annakuteera, 3rd Stage, Banashankar... | Addhuri Udupi Bhojana | No | No | 3.7/5 | 88 | Banashankari | Quick Bites | South Indian, North Indian | 300 | [('Rated 4.0', "RATED\n Great food and proper... | [] | Buffet | Banashankari |
| 4 | 10, 3rd Floor, Lakshmi Associates, Gandhi Baza... | Grand Village | No | No | 3.8/5 | 166 | Basavanagudi | Casual Dining | North Indian, Rajasthani | 600 | [('Rated 4.0', 'RATED\n Very good restaurant ... | [] | Buffet | Banashankari |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 51709 | 136, SAP Labs India, KIADB Export Promotion In... | The Farm House Bar n Grill | No | No | 3.7 /5 | 34 | Whitefield | Casual Dining, Bar | North Indian, Continental | 800 | [('Rated 4.0', 'RATED\n Ambience- Big and spa... | [] | Pubs and bars | Whitefield |
| 51711 | 139/C1, Next To GR Tech Park, Pattandur Agraha... | Bhagini | No | No | 2.5 /5 | 81 | Whitefield | Casual Dining, Bar | Andhra, South Indian, Chinese, North Indian | 800 | [('Rated 4.0', 'RATED\n A fine place to chill... | [] | Pubs and bars | Whitefield |
| 51712 | Four Points by Sheraton Bengaluru, 43/3, White... | Best Brews - Four Points by Sheraton Bengaluru... | No | No | 3.6 /5 | 27 | Whitefield | Bar | Continental | 1,500 | [('Rated 5.0', "RATED\n Food and service are ... | [] | Pubs and bars | Whitefield |
| 51715 | Sheraton Grand Bengaluru Whitefield Hotel & Co... | Chime - Sheraton Grand Bengaluru Whitefield Ho... | No | Yes | 4.3 /5 | 236 | ITPL Main Road, Whitefield | Bar | Finger Food | 2,500 | [('Rated 4.0', 'RATED\n Nice and friendly pla... | [] | Pubs and bars | Whitefield |
| 51716 | ITPL Main Road, KIADB Export Promotion Industr... | The Nest - The Den Bengaluru | No | No | 3.4 /5 | 13 | ITPL Main Road, Whitefield | Bar, Casual Dining | Finger Food, North Indian, Continental | 1,500 | [('Rated 5.0', 'RATED\n Great ambience , look... | [] | Pubs and bars | Whitefield |
43499 rows × 14 columns
display (df['cost'].unique())
array(['800', '300', '600', '700', '550', '500', '450', '650', '400',
'900', '200', '750', '150', '850', '100', '1,200', '350', '250',
'950', '1,000', '1,500', '1,300', '199', '80', '1,100', '160',
'1,600', '230', '130', '1,700', '1,400', '1,350', '2,200', '2,000',
'1,800', '1,900', '180', '330', '2,500', '2,100', '3,000', '2,800',
'3,400', '50', '40', '1,250', '3,500', '4,000', '2,400', '2,600',
'1,450', '70', '3,200', '560', '240', '360', '6,000', '1,050',
'2,300', '4,100', '120', '5,000', '3,700', '1,650', '2,700',
'4,500'], dtype=object)
# Remove the comma separator in the number. Change the data type from string to number
#df["cost"] = df["cost"].replace(",", "", regex=True)df
df['cost'] = df['cost'].apply(lambda x: x.replace(',','')) #Using lambda function to replace ',' from cost
df['cost'] = df['cost'].astype(float)
display (df['cost'].unique())
array([ 800., 300., 600., 700., 550., 500., 450., 650., 400.,
900., 200., 750., 150., 850., 100., 1200., 350., 250.,
950., 1000., 1500., 1300., 199., 80., 1100., 160., 1600.,
230., 130., 1700., 1400., 1350., 2200., 2000., 1800., 1900.,
180., 330., 2500., 2100., 3000., 2800., 3400., 50., 40.,
1250., 3500., 4000., 2400., 2600., 1450., 70., 3200., 560.,
240., 360., 6000., 1050., 2300., 4100., 120., 5000., 3700.,
1650., 2700., 4500.])
display (df.dtypes)
address object name object online_order object book_table object rate object votes int64 location object rest_type object cuisines object cost float64 reviews_list object menu_item object type object city object dtype: object
#Reading unique values from the Rate column
df['rate'].unique()
array(['4.1/5', '3.8/5', '3.7/5', '3.6/5', '4.6/5', '4.0/5', '4.2/5',
'3.9/5', '3.1/5', '3.0/5', '3.2/5', '3.3/5', '2.8/5', '4.4/5',
'4.3/5', 'NEW', '2.9/5', '3.5/5', '2.6/5', '3.8 /5', '3.4/5',
'4.5/5', '2.5/5', '2.7/5', '4.7/5', '2.4/5', '2.2/5', '2.3/5',
'3.4 /5', '-', '3.6 /5', '4.8/5', '3.9 /5', '4.2 /5', '4.0 /5',
'4.1 /5', '3.7 /5', '3.1 /5', '2.9 /5', '3.3 /5', '2.8 /5',
'3.5 /5', '2.7 /5', '2.5 /5', '3.2 /5', '2.6 /5', '4.5 /5',
'4.3 /5', '4.4 /5', '4.9/5', '2.1/5', '2.0/5', '1.8/5', '4.6 /5',
'4.9 /5', '3.0 /5', '4.8 /5', '2.3 /5', '4.7 /5', '2.4 /5',
'2.1 /5', '2.2 /5', '2.0 /5', '1.8 /5'], dtype=object)
# Number of records related to each unique value
display(df['rate'].value_counts().head(60))
NEW 2197 3.9/5 2089 3.7/5 2008 3.8/5 1997 3.9 /5 1865 3.8 /5 1819 3.7 /5 1799 3.6/5 1753 4.0/5 1597 4.0 /5 1547 3.6 /5 1533 4.1/5 1469 4.1 /5 1456 3.5/5 1423 3.5 /5 1340 3.4/5 1247 3.4 /5 1197 3.3/5 1147 4.2 /5 1141 3.3 /5 1125 4.2/5 1013 3.2/5 997 4.3 /5 910 3.1/5 851 3.2 /5 847 4.3/5 772 3.1 /5 699 4.4 /5 627 3.0/5 543 4.4/5 519 3.0 /5 447 2.9/5 427 4.5 /5 409 2.9 /5 374 2.8/5 302 2.8 /5 278 4.5/5 247 4.6 /5 175 2.7/5 167 2.6/5 140 2.7 /5 136 4.6/5 125 2.6 /5 109 4.7 /5 86 4.7/5 81 - 65 2.5 /5 56 2.5/5 44 4.8 /5 43 2.4/5 36 4.9 /5 30 2.4 /5 30 2.3/5 28 4.9/5 25 2.3 /5 23 4.8/5 23 2.2/5 19 2.1 /5 13 2.1/5 11 2.2 /5 7 Name: rate, dtype: int64
#There is one Unique value "NEW", get rid of all records (147 records) with value new in the rate column
df = df.loc[df.rate !='NEW',]
display(df)
| address | name | online_order | book_table | rate | votes | location | rest_type | cuisines | cost | reviews_list | menu_item | type | city | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 942, 21st Main Road, 2nd Stage, Banashankari, ... | Jalsa | Yes | Yes | 4.1/5 | 775 | Banashankari | Casual Dining | North Indian, Mughlai, Chinese | 800.0 | [('Rated 4.0', 'RATED\n A beautiful place to ... | [] | Buffet | Banashankari |
| 1 | 2nd Floor, 80 Feet Road, Near Big Bazaar, 6th ... | Spice Elephant | Yes | No | 4.1/5 | 787 | Banashankari | Casual Dining | Chinese, North Indian, Thai | 800.0 | [('Rated 4.0', 'RATED\n Had been here for din... | [] | Buffet | Banashankari |
| 2 | 1112, Next to KIMS Medical College, 17th Cross... | San Churro Cafe | Yes | No | 3.8/5 | 918 | Banashankari | Cafe, Casual Dining | Cafe, Mexican, Italian | 800.0 | [('Rated 3.0', "RATED\n Ambience is not that ... | [] | Buffet | Banashankari |
| 3 | 1st Floor, Annakuteera, 3rd Stage, Banashankar... | Addhuri Udupi Bhojana | No | No | 3.7/5 | 88 | Banashankari | Quick Bites | South Indian, North Indian | 300.0 | [('Rated 4.0', "RATED\n Great food and proper... | [] | Buffet | Banashankari |
| 4 | 10, 3rd Floor, Lakshmi Associates, Gandhi Baza... | Grand Village | No | No | 3.8/5 | 166 | Basavanagudi | Casual Dining | North Indian, Rajasthani | 600.0 | [('Rated 4.0', 'RATED\n Very good restaurant ... | [] | Buffet | Banashankari |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 51709 | 136, SAP Labs India, KIADB Export Promotion In... | The Farm House Bar n Grill | No | No | 3.7 /5 | 34 | Whitefield | Casual Dining, Bar | North Indian, Continental | 800.0 | [('Rated 4.0', 'RATED\n Ambience- Big and spa... | [] | Pubs and bars | Whitefield |
| 51711 | 139/C1, Next To GR Tech Park, Pattandur Agraha... | Bhagini | No | No | 2.5 /5 | 81 | Whitefield | Casual Dining, Bar | Andhra, South Indian, Chinese, North Indian | 800.0 | [('Rated 4.0', 'RATED\n A fine place to chill... | [] | Pubs and bars | Whitefield |
| 51712 | Four Points by Sheraton Bengaluru, 43/3, White... | Best Brews - Four Points by Sheraton Bengaluru... | No | No | 3.6 /5 | 27 | Whitefield | Bar | Continental | 1500.0 | [('Rated 5.0', "RATED\n Food and service are ... | [] | Pubs and bars | Whitefield |
| 51715 | Sheraton Grand Bengaluru Whitefield Hotel & Co... | Chime - Sheraton Grand Bengaluru Whitefield Ho... | No | Yes | 4.3 /5 | 236 | ITPL Main Road, Whitefield | Bar | Finger Food | 2500.0 | [('Rated 4.0', 'RATED\n Nice and friendly pla... | [] | Pubs and bars | Whitefield |
| 51716 | ITPL Main Road, KIADB Export Promotion Industr... | The Nest - The Den Bengaluru | No | No | 3.4 /5 | 13 | ITPL Main Road, Whitefield | Bar, Casual Dining | Finger Food, North Indian, Continental | 1500.0 | [('Rated 5.0', 'RATED\n Great ambience , look... | [] | Pubs and bars | Whitefield |
41302 rows × 14 columns
#There is one Unique value "-", get rid of all records (147 records) with value new in the rate column
df = df.loc[df.rate !='-']
display(df)
| address | name | online_order | book_table | rate | votes | location | rest_type | cuisines | cost | reviews_list | menu_item | type | city | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 942, 21st Main Road, 2nd Stage, Banashankari, ... | Jalsa | Yes | Yes | 4.1/5 | 775 | Banashankari | Casual Dining | North Indian, Mughlai, Chinese | 800.0 | [('Rated 4.0', 'RATED\n A beautiful place to ... | [] | Buffet | Banashankari |
| 1 | 2nd Floor, 80 Feet Road, Near Big Bazaar, 6th ... | Spice Elephant | Yes | No | 4.1/5 | 787 | Banashankari | Casual Dining | Chinese, North Indian, Thai | 800.0 | [('Rated 4.0', 'RATED\n Had been here for din... | [] | Buffet | Banashankari |
| 2 | 1112, Next to KIMS Medical College, 17th Cross... | San Churro Cafe | Yes | No | 3.8/5 | 918 | Banashankari | Cafe, Casual Dining | Cafe, Mexican, Italian | 800.0 | [('Rated 3.0', "RATED\n Ambience is not that ... | [] | Buffet | Banashankari |
| 3 | 1st Floor, Annakuteera, 3rd Stage, Banashankar... | Addhuri Udupi Bhojana | No | No | 3.7/5 | 88 | Banashankari | Quick Bites | South Indian, North Indian | 300.0 | [('Rated 4.0', "RATED\n Great food and proper... | [] | Buffet | Banashankari |
| 4 | 10, 3rd Floor, Lakshmi Associates, Gandhi Baza... | Grand Village | No | No | 3.8/5 | 166 | Basavanagudi | Casual Dining | North Indian, Rajasthani | 600.0 | [('Rated 4.0', 'RATED\n Very good restaurant ... | [] | Buffet | Banashankari |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 51709 | 136, SAP Labs India, KIADB Export Promotion In... | The Farm House Bar n Grill | No | No | 3.7 /5 | 34 | Whitefield | Casual Dining, Bar | North Indian, Continental | 800.0 | [('Rated 4.0', 'RATED\n Ambience- Big and spa... | [] | Pubs and bars | Whitefield |
| 51711 | 139/C1, Next To GR Tech Park, Pattandur Agraha... | Bhagini | No | No | 2.5 /5 | 81 | Whitefield | Casual Dining, Bar | Andhra, South Indian, Chinese, North Indian | 800.0 | [('Rated 4.0', 'RATED\n A fine place to chill... | [] | Pubs and bars | Whitefield |
| 51712 | Four Points by Sheraton Bengaluru, 43/3, White... | Best Brews - Four Points by Sheraton Bengaluru... | No | No | 3.6 /5 | 27 | Whitefield | Bar | Continental | 1500.0 | [('Rated 5.0', "RATED\n Food and service are ... | [] | Pubs and bars | Whitefield |
| 51715 | Sheraton Grand Bengaluru Whitefield Hotel & Co... | Chime - Sheraton Grand Bengaluru Whitefield Ho... | No | Yes | 4.3 /5 | 236 | ITPL Main Road, Whitefield | Bar | Finger Food | 2500.0 | [('Rated 4.0', 'RATED\n Nice and friendly pla... | [] | Pubs and bars | Whitefield |
| 51716 | ITPL Main Road, KIADB Export Promotion Industr... | The Nest - The Den Bengaluru | No | No | 3.4 /5 | 13 | ITPL Main Road, Whitefield | Bar, Casual Dining | Finger Food, North Indian, Continental | 1500.0 | [('Rated 5.0', 'RATED\n Great ambience , look... | [] | Pubs and bars | Whitefield |
41237 rows × 14 columns
# Display Unique Values
display (df['rate'].unique())
array(['4.1/5', '3.8/5', '3.7/5', '3.6/5', '4.6/5', '4.0/5', '4.2/5',
'3.9/5', '3.1/5', '3.0/5', '3.2/5', '3.3/5', '2.8/5', '4.4/5',
'4.3/5', '2.9/5', '3.5/5', '2.6/5', '3.8 /5', '3.4/5', '4.5/5',
'2.5/5', '2.7/5', '4.7/5', '2.4/5', '2.2/5', '2.3/5', '3.4 /5',
'3.6 /5', '4.8/5', '3.9 /5', '4.2 /5', '4.0 /5', '4.1 /5',
'3.7 /5', '3.1 /5', '2.9 /5', '3.3 /5', '2.8 /5', '3.5 /5',
'2.7 /5', '2.5 /5', '3.2 /5', '2.6 /5', '4.5 /5', '4.3 /5',
'4.4 /5', '4.9/5', '2.1/5', '2.0/5', '1.8/5', '4.6 /5', '4.9 /5',
'3.0 /5', '4.8 /5', '2.3 /5', '4.7 /5', '2.4 /5', '2.1 /5',
'2.2 /5', '2.0 /5', '1.8 /5'], dtype=object)
#Removing '/5' from Rates
df['rate'] = df['rate'].apply(lambda x: x.replace('/5',''))
display (df['rate'].unique())
array(['4.1', '3.8', '3.7', '3.6', '4.6', '4.0', '4.2', '3.9', '3.1',
'3.0', '3.2', '3.3', '2.8', '4.4', '4.3', '2.9', '3.5', '2.6',
'3.8 ', '3.4', '4.5', '2.5', '2.7', '4.7', '2.4', '2.2', '2.3',
'3.4 ', '3.6 ', '4.8', '3.9 ', '4.2 ', '4.0 ', '4.1 ', '3.7 ',
'3.1 ', '2.9 ', '3.3 ', '2.8 ', '3.5 ', '2.7 ', '2.5 ', '3.2 ',
'2.6 ', '4.5 ', '4.3 ', '4.4 ', '4.9', '2.1', '2.0', '1.8', '4.6 ',
'4.9 ', '3.0 ', '4.8 ', '2.3 ', '4.7 ', '2.4 ', '2.1 ', '2.2 ',
'2.0 ', '1.8 '], dtype=object)
#Restaurants with number of outlets
df['name'].value_counts()
Cafe Coffee Day 86
Onesta 85
Empire Restaurant 69
Kanti Sweets 68
Five Star Chicken 68
..
Daafoodies 1
I Siri Restaurant And Caterers 1
The Foodware Veg 1
Hotel Thalassery 1
SeeYa Restaurant 1
Name: name, Length: 6602, dtype: int64
#Display the Top 20 restaurants
df['name'].value_counts().head(20)
Cafe Coffee Day 86 Onesta 85 Empire Restaurant 69 Kanti Sweets 68 Five Star Chicken 68 Just Bake 67 Baskin Robbins 62 Petoo 60 Pizza Hut 60 KFC 60 Smoor 59 McDonald's 59 Domino's Pizza 59 Subway 59 Sweet Truth 58 Polar Bear 58 Faasos 56 Beijing Bites 56 Burger King 55 Keventers 54 Name: name, dtype: int64
#Create a data frame with top 20 restaurants
chains=df['name'].value_counts()[:20]
display(chains)
Cafe Coffee Day 86 Onesta 85 Empire Restaurant 69 Kanti Sweets 68 Five Star Chicken 68 Just Bake 67 Baskin Robbins 62 Petoo 60 Pizza Hut 60 KFC 60 Smoor 59 McDonald's 59 Domino's Pizza 59 Subway 59 Sweet Truth 58 Polar Bear 58 Faasos 56 Beijing Bites 56 Burger King 55 Keventers 54 Name: name, dtype: int64
# Visualisations- Bar chat with Top 20 restaurants with number of outlets
plt.figure(figsize=(20,15))
chains=df['name'].value_counts()[:20]
sns.barplot(x=chains,y=chains.index,palette='deep')
plt.title("Most famous restaurants chains in Bangaluru")
plt.xlabel("Number of outlets")
plt.show()
#Restaurants with table booking
display(df['book_table'].value_counts())
No 34938 Yes 6299 Name: book_table, dtype: int64
plt.figure(figsize=(10,10))
sns.countplot(df['book_table'])
plt.title("Number of restaurants that have the option to book table",fontsize=25,color='purple')
plt.ylabel("Count",fontsize=20)
plt.xlabel("Book Table",fontsize=20)
plt.show()
#Restaurants delivering Online or not
sns.countplot(df['online_order'])
fig = plt.gcf()
fig.set_size_inches(10,10)
plt.title('Whether Restaurants deliver online or Not')
plt.show()
# Ratings distribution , most of the ratings are within 3.5 and 4.5
plt.figure(figsize=(15,7))
sns.distplot(df['rate'],bins=20)
<AxesSubplot:xlabel='rate', ylabel='Density'>
#Count of ratings as between "1 and 2", "2 and 3", "3 and 4", and "4 and 5"
# Display Unique Rating
display(df['rate'].unique())
array(['4.1', '3.8', '3.7', '3.6', '4.6', '4.0', '4.2', '3.9', '3.1',
'3.0', '3.2', '3.3', '2.8', '4.4', '4.3', '2.9', '3.5', '2.6',
'3.8 ', '3.4', '4.5', '2.5', '2.7', '4.7', '2.4', '2.2', '2.3',
'3.4 ', '3.6 ', '4.8', '3.9 ', '4.2 ', '4.0 ', '4.1 ', '3.7 ',
'3.1 ', '2.9 ', '3.3 ', '2.8 ', '3.5 ', '2.7 ', '2.5 ', '3.2 ',
'2.6 ', '4.5 ', '4.3 ', '4.4 ', '4.9', '2.1', '2.0', '1.8', '4.6 ',
'4.9 ', '3.0 ', '4.8 ', '2.3 ', '4.7 ', '2.4 ', '2.1 ', '2.2 ',
'2.0 ', '1.8 '], dtype=object)
# Convert Rating from string to float
df['rate']=df['rate'].astype(float)
display(df['rate'].unique())
array([4.1, 3.8, 3.7, 3.6, 4.6, 4. , 4.2, 3.9, 3.1, 3. , 3.2, 3.3, 2.8,
4.4, 4.3, 2.9, 3.5, 2.6, 3.4, 4.5, 2.5, 2.7, 4.7, 2.4, 2.2, 2.3,
4.8, 4.9, 2.1, 2. , 1.8])
# Create Histogram
group= [1,2,3,4,5]
plt.hist (df['rate'],group,histtype = 'bar',rwidth =0.5,color = 'b')
plt.show()
# Number of Restaurants with Rating Between 1 and 2
gr_1to2=((df['rate']>=1) & (df['rate']<2)).sum()
display (gr_1to2)
5
# Number of Restaurants with Rating Between 2 and 3
gr_2to3=((df['rate']>=2) & (df['rate']<3)).sum()
display (gr_2to3)
2211
# Number of Restaurants with Rating Between 3 and 4
gr_3to4=((df['rate']>=3) & (df['rate']<4)).sum()
display (gr_3to4)
26726
# Number of Restaurants with Rating more than 4
gr_4to5=(df['rate']>=4).sum()
display (gr_4to5)
12295
import plotly.graph_objs as go
import plotly.offline as py
# Create a Pie Chart
slices=[gr_1to2,gr_2to3,gr_3to4,gr_4to5]
labels=['Rating 1 to 2','Rating 2 to 3','Rating 3 to 4','Rating >4']
colors = ['#ff3333','#c2c2d6','#6699ff']
plt.pie(slices,colors=colors, labels=labels, autopct='%1.0f%%', pctdistance=.5, labeldistance=1.2,shadow=True)
fig = plt.gcf()
plt.title("Percentage of Restaurants according to their ratings")
fig.set_size_inches(10,10)
plt.show()
display(df['type'].value_counts())
Delivery 20431 Dine-out 14062 Desserts 2709 Cafes 1511 Drinks & nightlife 1045 Buffet 847 Pubs and bars 632 Name: type, dtype: int64
#Service Type - count Plot
# Here the two main service types are Delivery and Dine-out
sns.countplot(df['type']).set_xticklabels(sns.countplot(df['type']).get_xticklabels(), rotation=90, ha="right")
fig = plt.gcf()
fig.set_size_inches(20,12)
plt.title('Type of Service')
plt.show()
# Display Unique Cost
two_people_cost = df['cost'].unique()
display(np.sort(two_people_cost))
array([ 40., 50., 70., 80., 100., 120., 130., 150., 180.,
199., 200., 230., 240., 250., 300., 330., 350., 400.,
450., 500., 550., 600., 650., 700., 750., 800., 850.,
900., 950., 1000., 1050., 1100., 1200., 1250., 1300., 1350.,
1400., 1450., 1500., 1600., 1650., 1700., 1800., 1900., 2000.,
2100., 2200., 2300., 2400., 2500., 2600., 2700., 2800., 3000.,
3200., 3400., 3500., 3700., 4000., 4100., 4500., 5000., 6000.])
#Number of Restaurants in each group
df.groupby('cost').size()
cost
40.0 8
50.0 6
70.0 1
80.0 1
100.0 636
...
4000.0 29
4100.0 4
4500.0 2
5000.0 1
6000.0 2
Length: 63, dtype: int64
# Create a box plot
from plotly.offline import iplot
trace0=go.Box(y=df['cost'],name="accepting online orders",
marker = dict(
color = 'rgb(113, 10, 100)',
))
data=[trace0]
layout=go.Layout(title="Box plot of approximate cost",width=800,height=800,yaxis=dict(title="Price"))
fig=go.Figure(data=data,layout=layout)
py.iplot(fig)
plt.figure(figsize=(10,10))
ax=df.rest_type.value_counts()[:10]
label=df['rest_type'].value_counts()[:10].index
ax.plot(kind='pie',labels=label,autopct='%.2f')
plt.title("Type of Restaurant in City",fontsize=20,color='darkgreen')
plt.show()
plt.figure(figsize=(10,10))
ax=df.city.value_counts()[:10]
labels=df['city'].value_counts()[:10].index
plt.pie(ax,labels=labels,autopct='%.2f')
plt.title('number of restaurants in each area of bangalore',fontsize=20,color='darkblue')
plt.show()
# Restaurant Type and Count
df['rest_type'].value_counts().head(50)
Quick Bites 13871 Casual Dining 9608 Cafe 3368 Dessert Parlor 1850 Delivery 1666 Takeaway, Delivery 1278 Casual Dining, Bar 1092 Bakery 704 Bar 640 Beverage Shop 639 Food Court 498 Bar, Casual Dining 385 Lounge 381 Pub 351 Sweet Shop 346 Fine Dining 342 Casual Dining, Cafe 310 Beverage Shop, Quick Bites 239 Pub, Casual Dining 236 Bakery, Quick Bites 225 Mess 180 Cafe, Casual Dining 173 Sweet Shop, Quick Bites 171 Kiosk 152 Cafe, Bakery 146 Cafe, Dessert Parlor 144 Dessert Parlor, Cafe 144 Casual Dining, Pub 127 Bakery, Dessert Parlor 127 Microbrewery, Casual Dining 121 Dessert Parlor, Quick Bites 118 Cafe, Quick Bites 93 Takeaway 79 Food Court, Quick Bites 78 Pub, Microbrewery 76 Quick Bites, Sweet Shop 75 Beverage Shop, Dessert Parlor 75 Dessert Parlor, Beverage Shop 73 Quick Bites, Dessert Parlor 73 Bakery, Cafe 72 Quick Bites, Beverage Shop 72 Food Truck 68 Dessert Parlor, Bakery 67 Casual Dining, Microbrewery 47 Quick Bites, Bakery 43 Microbrewery, Pub 42 Pub, Cafe 40 Fine Dining, Bar 40 Club 37 Lounge, Casual Dining 37 Name: rest_type, dtype: int64
# Create Graph with top 20 restaurant type
#Casual Dining, Quick Bites and Cafe are the 3 most common types of Restaurants
plt.figure(figsize=(15,7))
rest=df['rest_type'].value_counts()[:20]
sns.barplot(rest,rest.index)
plt.title("Restaurant types")
plt.xlabel("count")
plt.show()
plt.figure(figsize=(15,7))
chains=df['name'].value_counts()[:20]
sns.barplot(x=chains,y=chains.index,palette='Set1')
plt.title("Most famous restaurant chains",size=20,pad=20)
plt.xlabel("Number of outlets",size=15)
plt.show()
# Create the Model
# display top 5 records
display(df.head())
| address | name | online_order | book_table | rate | votes | location | rest_type | cuisines | cost | reviews_list | menu_item | type | city | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 942, 21st Main Road, 2nd Stage, Banashankari, ... | Jalsa | Yes | Yes | 4.1 | 775 | Banashankari | Casual Dining | North Indian, Mughlai, Chinese | 800.0 | [('Rated 4.0', 'RATED\n A beautiful place to ... | [] | Buffet | Banashankari |
| 1 | 2nd Floor, 80 Feet Road, Near Big Bazaar, 6th ... | Spice Elephant | Yes | No | 4.1 | 787 | Banashankari | Casual Dining | Chinese, North Indian, Thai | 800.0 | [('Rated 4.0', 'RATED\n Had been here for din... | [] | Buffet | Banashankari |
| 2 | 1112, Next to KIMS Medical College, 17th Cross... | San Churro Cafe | Yes | No | 3.8 | 918 | Banashankari | Cafe, Casual Dining | Cafe, Mexican, Italian | 800.0 | [('Rated 3.0', "RATED\n Ambience is not that ... | [] | Buffet | Banashankari |
| 3 | 1st Floor, Annakuteera, 3rd Stage, Banashankar... | Addhuri Udupi Bhojana | No | No | 3.7 | 88 | Banashankari | Quick Bites | South Indian, North Indian | 300.0 | [('Rated 4.0', "RATED\n Great food and proper... | [] | Buffet | Banashankari |
| 4 | 10, 3rd Floor, Lakshmi Associates, Gandhi Baza... | Grand Village | No | No | 3.8 | 166 | Basavanagudi | Casual Dining | North Indian, Rajasthani | 600.0 | [('Rated 4.0', 'RATED\n Very good restaurant ... | [] | Buffet | Banashankari |
#Convert the online categorical variables into a numeric format
df.online_order[df.online_order == 'Yes'] = 1
df.online_order[df.online_order == 'No'] = 0
df.online_order.value_counts()
1 27081 0 14156 Name: online_order, dtype: int64
#Change the data type to Number
df.online_order = pd.to_numeric(df.online_order)
display (df.head(20))
| address | name | online_order | book_table | rate | votes | location | rest_type | cuisines | cost | reviews_list | menu_item | type | city | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 942, 21st Main Road, 2nd Stage, Banashankari, ... | Jalsa | 1 | Yes | 4.1 | 775 | Banashankari | Casual Dining | North Indian, Mughlai, Chinese | 800.0 | [('Rated 4.0', 'RATED\n A beautiful place to ... | [] | Buffet | Banashankari |
| 1 | 2nd Floor, 80 Feet Road, Near Big Bazaar, 6th ... | Spice Elephant | 1 | No | 4.1 | 787 | Banashankari | Casual Dining | Chinese, North Indian, Thai | 800.0 | [('Rated 4.0', 'RATED\n Had been here for din... | [] | Buffet | Banashankari |
| 2 | 1112, Next to KIMS Medical College, 17th Cross... | San Churro Cafe | 1 | No | 3.8 | 918 | Banashankari | Cafe, Casual Dining | Cafe, Mexican, Italian | 800.0 | [('Rated 3.0', "RATED\n Ambience is not that ... | [] | Buffet | Banashankari |
| 3 | 1st Floor, Annakuteera, 3rd Stage, Banashankar... | Addhuri Udupi Bhojana | 0 | No | 3.7 | 88 | Banashankari | Quick Bites | South Indian, North Indian | 300.0 | [('Rated 4.0', "RATED\n Great food and proper... | [] | Buffet | Banashankari |
| 4 | 10, 3rd Floor, Lakshmi Associates, Gandhi Baza... | Grand Village | 0 | No | 3.8 | 166 | Basavanagudi | Casual Dining | North Indian, Rajasthani | 600.0 | [('Rated 4.0', 'RATED\n Very good restaurant ... | [] | Buffet | Banashankari |
| 5 | 37, 5-1, 4th Floor, Bosco Court, Gandhi Bazaar... | Timepass Dinner | 1 | No | 3.8 | 286 | Basavanagudi | Casual Dining | North Indian | 600.0 | [('Rated 3.0', 'RATED\n Food 3/5\nAmbience 3/... | [] | Buffet | Banashankari |
| 6 | 19/1, New Timberyard Layout, Beside Satellite ... | Rosewood International Hotel - Bar & Restaurant | 0 | No | 3.6 | 8 | Mysore Road | Casual Dining | North Indian, South Indian, Andhra, Chinese | 800.0 | [('Rated 5.0', 'RATED\n Awesome food ??Great ... | [] | Buffet | Banashankari |
| 7 | 2469, 3rd Floor, 24th Cross, Opposite BDA Comp... | Onesta | 1 | Yes | 4.6 | 2556 | Banashankari | Casual Dining, Cafe | Pizza, Cafe, Italian | 600.0 | [('Rated 5.0', 'RATED\n I personally really l... | [] | Cafes | Banashankari |
| 8 | 1, 30th Main Road, 3rd Stage, Banashankari, Ba... | Penthouse Cafe | 1 | No | 4.0 | 324 | Banashankari | Cafe | Cafe, Italian, Continental | 700.0 | [('Rated 3.0', "RATED\n I had been to this pl... | [] | Cafes | Banashankari |
| 9 | 2470, 21 Main Road, 25th Cross, Banashankari, ... | Smacznego | 1 | No | 4.2 | 504 | Banashankari | Cafe | Cafe, Mexican, Italian, Momos, Beverages | 550.0 | [('Rated 4.0', "RATED\n Easy to locate\nVFM 3... | [] | Cafes | Banashankari |
| 10 | 12,29 Near PES University Back Gate, D'Souza N... | CafÃÂÃÂÃÂÃÂÃÂÃÂÃÂé Down The A... | 1 | No | 4.1 | 402 | Banashankari | Cafe | Cafe | 500.0 | [('Rated 4.0', 'RATED\n We ended up here on a... | [] | Cafes | Banashankari |
| 11 | 941, 3rd FLOOR, 21st Main, 22nd Cross, Banasha... | Cafe Shuffle | 1 | Yes | 4.2 | 150 | Banashankari | Cafe | Cafe, Italian, Continental | 600.0 | [('Rated 1.0', "RATED\n \n\nHorrible. Not even... | [] | Cafes | Banashankari |
| 12 | 6th Block, 3rd Stage, Banashankari, Bangalore | The Coffee Shack | 1 | Yes | 4.2 | 164 | Banashankari | Cafe | Cafe, Chinese, Continental, Italian | 500.0 | [('Rated 4.0', "RATED\n Food - 4/5\nAmbience ... | [] | Cafes | Banashankari |
| 13 | 111, Sapphire Toys Building, 100 Feet Ring Roa... | Caf-Eleven | 0 | No | 4.0 | 424 | Banashankari | Cafe | Cafe, Continental | 450.0 | [('Rated 2.0', "RATED\n This is a hookah cafe... | [] | Cafes | Banashankari |
| 14 | 1112, Next to KIMS Medical College, 17th Cross... | San Churro Cafe | 1 | No | 3.8 | 918 | Banashankari | Cafe, Casual Dining | Cafe, Mexican, Italian | 800.0 | [('Rated 3.0', "RATED\n Ambience is not that ... | [] | Cafes | Banashankari |
| 15 | 2303, 21st Cross, K R Road, 2nd Stage, Banasha... | Cafe Vivacity | 1 | No | 3.8 | 90 | Banashankari | Cafe | Cafe | 650.0 | [('Rated 2.0', 'RATED\n Not so good place as ... | [] | Cafes | Banashankari |
| 16 | 241, 4th Floor, 100 Feet Ring Road, Opposite K... | Catch-up-ino | 1 | No | 3.9 | 133 | Banashankari | Cafe | Cafe, Fast Food, Continental, Chinese, Momos | 800.0 | [('Rated 1.0', "RATED\n This place is right o... | [] | Cafes | Banashankari |
| 17 | 405, 24th Cross, 9th Main, 2nd Stage, Banashan... | Kirthi's Biryani | 1 | No | 3.8 | 144 | Banashankari | Cafe | Chinese, Cafe, Italian | 700.0 | [('Rated 3.0', "RATED\n New place.. though it... | [] | Cafes | Banashankari |
| 18 | 504, CJ Venkata Das Road, Padmanabhangar, 2nd ... | T3H Cafe | 0 | No | 3.9 | 93 | Banashankari | Cafe | Cafe, Italian, American | 300.0 | [('Rated 4.0', "RATED\n Happy to see such a c... | [] | Cafes | Banashankari |
| 19 | 47, 48 &49, 3Rd Floor, 21st Main Road, 2nd Sta... | 360 Atoms Restaurant And Cafe | 1 | No | 3.1 | 13 | Banashankari | Cafe | Cafe, Chinese, Continental, Italian | 400.0 | [('Rated 5.0', 'RATED\n Friendly staffs , nic... | [] | Cafes | Banashankari |
#change the string categorical into to a categorical int
df.book_table[df.book_table == 'Yes'] = 1
df.book_table[df.book_table == 'No'] = 0
df.book_table = pd.to_numeric(df.book_table)
display(df.book_table.value_counts())
0 34938 1 6299 Name: book_table, dtype: int64
#Label encode the categorical variables to make it easier to build algorithm
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
df.location = le.fit_transform(df.location)
df.rest_type = le.fit_transform(df.rest_type)
df.cuisines = le.fit_transform(df.cuisines)
df.menu_item = le.fit_transform(df.menu_item)
display (df.head(20))
| address | name | online_order | book_table | rate | votes | location | rest_type | cuisines | cost | reviews_list | menu_item | type | city | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 942, 21st Main Road, 2nd Stage, Banashankari, ... | Jalsa | 1 | 1 | 4.1 | 775 | 1 | 24 | 1894 | 800.0 | [('Rated 4.0', 'RATED\n A beautiful place to ... | 8242 | Buffet | Banashankari |
| 1 | 2nd Floor, 80 Feet Road, Near Big Bazaar, 6th ... | Spice Elephant | 1 | 0 | 4.1 | 787 | 1 | 24 | 816 | 800.0 | [('Rated 4.0', 'RATED\n Had been here for din... | 8242 | Buffet | Banashankari |
| 2 | 1112, Next to KIMS Medical College, 17th Cross... | San Churro Cafe | 1 | 0 | 3.8 | 918 | 1 | 19 | 653 | 800.0 | [('Rated 3.0', "RATED\n Ambience is not that ... | 8242 | Buffet | Banashankari |
| 3 | 1st Floor, Annakuteera, 3rd Stage, Banashankar... | Addhuri Udupi Bhojana | 0 | 0 | 3.7 | 88 | 1 | 73 | 2221 | 300.0 | [('Rated 4.0', "RATED\n Great food and proper... | 8242 | Buffet | Banashankari |
| 4 | 10, 3rd Floor, Lakshmi Associates, Gandhi Baza... | Grand Village | 0 | 0 | 3.8 | 166 | 4 | 24 | 1921 | 600.0 | [('Rated 4.0', 'RATED\n Very good restaurant ... | 8242 | Buffet | Banashankari |
| 5 | 37, 5-1, 4th Floor, Bosco Court, Gandhi Bazaar... | Timepass Dinner | 1 | 0 | 3.8 | 286 | 4 | 24 | 1585 | 600.0 | [('Rated 3.0', 'RATED\n Food 3/5\nAmbience 3/... | 8242 | Buffet | Banashankari |
| 6 | 19/1, New Timberyard Layout, Beside Satellite ... | Rosewood International Hotel - Bar & Restaurant | 0 | 0 | 3.6 | 8 | 56 | 24 | 1938 | 800.0 | [('Rated 5.0', 'RATED\n Awesome food ??Great ... | 8242 | Buffet | Banashankari |
| 7 | 2469, 3rd Floor, 24th Cross, Opposite BDA Comp... | Onesta | 1 | 1 | 4.6 | 2556 | 1 | 26 | 2028 | 600.0 | [('Rated 5.0', 'RATED\n I personally really l... | 8242 | Cafes | Banashankari |
| 8 | 1, 30th Main Road, 3rd Stage, Banashankari, Ba... | Penthouse Cafe | 1 | 0 | 4.0 | 324 | 1 | 16 | 629 | 700.0 | [('Rated 3.0', "RATED\n I had been to this pl... | 8242 | Cafes | Banashankari |
| 9 | 2470, 21 Main Road, 25th Cross, Banashankari, ... | Smacznego | 1 | 0 | 4.2 | 504 | 1 | 16 | 654 | 550.0 | [('Rated 4.0', "RATED\n Easy to locate\nVFM 3... | 8242 | Cafes | Banashankari |
| 10 | 12,29 Near PES University Back Gate, D'Souza N... | CafÃÂÃÂÃÂÃÂÃÂÃÂÃÂé Down The A... | 1 | 0 | 4.1 | 402 | 1 | 16 | 475 | 500.0 | [('Rated 4.0', 'RATED\n We ended up here on a... | 8242 | Cafes | Banashankari |
| 11 | 941, 3rd FLOOR, 21st Main, 22nd Cross, Banasha... | Cafe Shuffle | 1 | 1 | 4.2 | 150 | 1 | 16 | 629 | 600.0 | [('Rated 1.0', "RATED\n \n\nHorrible. Not even... | 8242 | Cafes | Banashankari |
| 12 | 6th Block, 3rd Stage, Banashankari, Bangalore | The Coffee Shack | 1 | 1 | 4.2 | 164 | 1 | 16 | 532 | 500.0 | [('Rated 4.0', "RATED\n Food - 4/5\nAmbience ... | 8242 | Cafes | Banashankari |
| 13 | 111, Sapphire Toys Building, 100 Feet Ring Roa... | Caf-Eleven | 0 | 0 | 4.0 | 424 | 1 | 16 | 538 | 450.0 | [('Rated 2.0', "RATED\n This is a hookah cafe... | 8242 | Cafes | Banashankari |
| 14 | 1112, Next to KIMS Medical College, 17th Cross... | San Churro Cafe | 1 | 0 | 3.8 | 918 | 1 | 19 | 653 | 800.0 | [('Rated 3.0', "RATED\n Ambience is not that ... | 8242 | Cafes | Banashankari |
| 15 | 2303, 21st Cross, K R Road, 2nd Stage, Banasha... | Cafe Vivacity | 1 | 0 | 3.8 | 90 | 1 | 16 | 475 | 650.0 | [('Rated 2.0', 'RATED\n Not so good place as ... | 8242 | Cafes | Banashankari |
| 16 | 241, 4th Floor, 100 Feet Ring Road, Opposite K... | Catch-up-ino | 1 | 0 | 3.9 | 133 | 1 | 16 | 604 | 800.0 | [('Rated 1.0', "RATED\n This place is right o... | 8242 | Cafes | Banashankari |
| 17 | 405, 24th Cross, 9th Main, 2nd Stage, Banashan... | Kirthi's Biryani | 1 | 0 | 3.8 | 144 | 1 | 16 | 720 | 700.0 | [('Rated 3.0', "RATED\n New place.. though it... | 8242 | Cafes | Banashankari |
| 18 | 504, CJ Venkata Das Road, Padmanabhangar, 2nd ... | T3H Cafe | 0 | 0 | 3.9 | 93 | 1 | 16 | 623 | 300.0 | [('Rated 4.0', "RATED\n Happy to see such a c... | 8242 | Cafes | Banashankari |
| 19 | 47, 48 &49, 3Rd Floor, 21st Main Road, 2nd Sta... | 360 Atoms Restaurant And Cafe | 1 | 0 | 3.1 | 13 | 1 | 16 | 532 | 400.0 | [('Rated 5.0', 'RATED\n Friendly staffs , nic... | 8242 | Cafes | Banashankari |
my_data=df.iloc[:,[2,3,4,5,6,7,8,9,11]]
my_data.to_csv('Zomato_df.csv')
display (my_data)
| online_order | book_table | rate | votes | location | rest_type | cuisines | cost | menu_item | |
|---|---|---|---|---|---|---|---|---|---|
| 0 | 1 | 1 | 4.1 | 775 | 1 | 24 | 1894 | 800.0 | 8242 |
| 1 | 1 | 0 | 4.1 | 787 | 1 | 24 | 816 | 800.0 | 8242 |
| 2 | 1 | 0 | 3.8 | 918 | 1 | 19 | 653 | 800.0 | 8242 |
| 3 | 0 | 0 | 3.7 | 88 | 1 | 73 | 2221 | 300.0 | 8242 |
| 4 | 0 | 0 | 3.8 | 166 | 4 | 24 | 1921 | 600.0 | 8242 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 41232 | 0 | 0 | 3.7 | 34 | 88 | 25 | 1785 | 800.0 | 8242 |
| 41233 | 0 | 0 | 2.5 | 81 | 88 | 25 | 101 | 800.0 | 8242 |
| 41234 | 0 | 0 | 3.6 | 27 | 88 | 6 | 866 | 1500.0 | 8242 |
| 41235 | 0 | 1 | 4.3 | 236 | 26 | 6 | 1207 | 2500.0 | 8242 |
| 41236 | 0 | 0 | 3.4 | 13 | 26 | 7 | 1231 | 1500.0 | 8242 |
41237 rows × 9 columns
#Create independent Variable
x = df.iloc[:,[2,3,5,6,7,8,9,11]]
x.head()
| online_order | book_table | votes | location | rest_type | cuisines | cost | menu_item | |
|---|---|---|---|---|---|---|---|---|
| 0 | 1 | 1 | 775 | 1 | 24 | 1894 | 800.0 | 8242 |
| 1 | 1 | 0 | 787 | 1 | 24 | 816 | 800.0 | 8242 |
| 2 | 1 | 0 | 918 | 1 | 19 | 653 | 800.0 | 8242 |
| 3 | 0 | 0 | 88 | 1 | 73 | 2221 | 300.0 | 8242 |
| 4 | 0 | 0 | 166 | 4 | 24 | 1921 | 600.0 | 8242 |
#Create Dependent Variable
y = df['rate']
display(y)
0 4.1
1 4.1
2 3.8
3 3.7
4 3.8
...
41232 3.7
41233 2.5
41234 3.6
41235 4.3
41236 3.4
Name: rate, Length: 41237, dtype: float64
# Train Test Split
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=.3,random_state=10)
# Create Linear Regression Model
from sklearn.linear_model import LinearRegression
lr_model=LinearRegression()
lr_model.fit(x_train,y_train)
LinearRegression()
# Display Accuracy
from sklearn.metrics import r2_score
y_pred=lr_model.predict(x_test)
display (r2_score(y_test,y_pred))
0.30080021393797163
# Random Forest Regressor
from sklearn.ensemble import RandomForestRegressor
RF_Model=RandomForestRegressor(n_estimators=650,random_state=245,min_samples_leaf=.0001)
RF_Model.fit(x_train,y_train)
RandomForestRegressor(min_samples_leaf=0.0001, n_estimators=650,
random_state=245)
y_predict=RF_Model.predict(x_test)
display (r2_score(y_test,y_predict))
0.8743074009718356
from sklearn.linear_model import Lasso
lr=Lasso()
lr
Lasso()
lr.fit(x_train,y_train)
Lasso()
y_predict=lr.predict(x_test)
display (r2_score(y_test,y_pred))
0.30080021393797163
from sklearn.svm import SVR
svr=SVR()
svr.fit(x_train,y_train)
SVR()
y_predict=svr.predict(x_test)
display (r2_score(y_test,y_pred))
0.30080021393797163
import xgboost as xgb
xgb_model=xgb.XGBRegressor()
xgb_model.fit(x_train,y_train)
xgb_pred=xgb_model.predict(x_test)
display (r2_score(y_test,xgb_pred))
0.7391740232273454
from sklearn.neighbors import KNeighborsRegressor
knn =KNeighborsRegressor(n_neighbors =5 , metric = 'minkowski', p= 1)
knn.fit (x_train,y_train )
KNeighborsRegressor(p=1)
y_pred = knn.predict (x_test)
y_pred
array([3.94, 3.58, 3. , ..., 3.66, 4.48, 3.26])
display (r2_score(y_test,y_pred))
0.673740950903428
#Ordinary Least Squares regression (OLS) is a common technique for estimating coefficients of linear regression equations
#which describe the relationship between one or more independent quantitative variables and a dependent variable
x=x.astype('float64')
import statsmodels.api as sm
reg_ols = sm.OLS (endog = y, exog = x)
reg_ols = reg_ols.fit()
print (reg_ols.summary())
OLS Regression Results
=======================================================================================
Dep. Variable: rate R-squared (uncentered): 0.960
Model: OLS Adj. R-squared (uncentered): 0.960
Method: Least Squares F-statistic: 1.232e+05
Date: Tue, 02 Aug 2022 Prob (F-statistic): 0.00
Time: 00:42:06 Log-Likelihood: -46482.
No. Observations: 41237 AIC: 9.298e+04
Df Residuals: 41229 BIC: 9.305e+04
Df Model: 8
Covariance Type: nonrobust
================================================================================
coef std err t P>|t| [0.025 0.975]
--------------------------------------------------------------------------------
online_order 0.9361 0.007 134.118 0.000 0.922 0.950
book_table 0.0076 0.013 0.572 0.567 -0.018 0.034
votes 0.0002 4.6e-06 34.097 0.000 0.000 0.000
location 0.0064 0.000 46.179 0.000 0.006 0.007
rest_type 0.0083 0.000 56.430 0.000 0.008 0.009
cuisines 0.0002 5.59e-06 35.589 0.000 0.000 0.000
cost 0.0008 1.01e-05 77.360 0.000 0.001 0.001
menu_item 0.0002 1.24e-06 179.839 0.000 0.000 0.000
==============================================================================
Omnibus: 96.439 Durbin-Watson: 1.516
Prob(Omnibus): 0.000 Jarque-Bera (JB): 95.812
Skew: 0.110 Prob(JB): 1.57e-21
Kurtosis: 2.913 Cond. No. 2.74e+04
==============================================================================
Notes:
[1] R² is computed without centering (uncentered) since the model does not contain a constant.
[2] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[3] The condition number is large, 2.74e+04. This might indicate that there are
strong multicollinearity or other numerical problems.
from sklearn.linear_model import LinearRegression
reg= LinearRegression()
reg.fit(x_train,y_train)
LinearRegression()
y_pred= reg.predict(x_test)
print (y_pred)
[3.6554235 3.45375423 3.46836078 ... 3.53803275 4.72090704 3.45206852]
r2_score (y_test,y_pred)
0.30080021393797163
from sklearn.tree import DecisionTreeRegressor
dt =DecisionTreeRegressor()
dt.fit(x,y)
DecisionTreeRegressor()
y_pred= reg.predict(x_test)
print (y_pred)
r2_score (y_test,y_pred)
[3.6554235 3.45375423 3.46836078 ... 3.53803275 4.72090704 3.45206852]
0.30080021393797163
from sklearn.preprocessing import PolynomialFeatures
polynom = PolynomialFeatures(degree =1)
polynom.fit_transform(x)
array([[1.000e+00, 1.000e+00, 1.000e+00, ..., 1.894e+03, 8.000e+02,
8.242e+03],
[1.000e+00, 1.000e+00, 0.000e+00, ..., 8.160e+02, 8.000e+02,
8.242e+03],
[1.000e+00, 1.000e+00, 0.000e+00, ..., 6.530e+02, 8.000e+02,
8.242e+03],
...,
[1.000e+00, 0.000e+00, 0.000e+00, ..., 8.660e+02, 1.500e+03,
8.242e+03],
[1.000e+00, 0.000e+00, 1.000e+00, ..., 1.207e+03, 2.500e+03,
8.242e+03],
[1.000e+00, 0.000e+00, 0.000e+00, ..., 1.231e+03, 1.500e+03,
8.242e+03]])
y_pred= reg.predict(x_test)
print (y_pred)
[3.6554235 3.45375423 3.46836078 ... 3.53803275 4.72090704 3.45206852]
display (r2_score(y_test,y_pred))
0.30080021393797163
#Preparing Extra Tree Regression
from sklearn.ensemble import ExtraTreesRegressor
ET_Model=ExtraTreesRegressor(n_estimators = 120)
ET_Model.fit(x_train,y_train)
ExtraTreesRegressor(n_estimators=120)
y_predict=ET_Model.predict(x_test)
from sklearn.metrics import r2_score
display(r2_score(y_test,y_predict))
0.9323350619857341
#Use pickle to save our model so that we can use it later
import pickle
# Saving model to disk
pickle.dump(ET_Model, open('model.pkl','wb'))
model=pickle.load(open('model.pkl','rb'))
model
ExtraTreesRegressor(n_estimators=120)
model.predict(x)
array([4.1, 4.1, 3.8, ..., 3.6, 4.3, 3.4])